Rapid projections for B-ALL data
library(Seurat)
Warning messages:
1: package ‘SeuratObject’ was built under R version 4.1.2
2: package ‘sp’ was built under R version 4.1.2
library(symphony)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.1 ✔ purrr 0.3.4
✔ tibble 3.1.2 ✔ dplyr 1.0.7
✔ tidyr 1.1.3 ✔ stringr 1.4.0
✔ readr 1.4.0 ✔ forcats 0.5.1
Warning: package ‘ggplot2’ was built under R version 4.1.2── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
Bone Marrow Reference Map
BM_projection_path = '../../../../../../AMLhierarchies/scRNA_projection/Complete_Hematopoiesis/BM_V2only/Final_Embedding/Final_projections/ProjectionTools/'
source(paste0(BM_projection_path, 'Symphony_Utils_BMref.R'))
BM_ref <- readRDS(paste0(BM_projection_path, 'BoneMarrow_RefMap_SymphonyRef.rds'))
# Fix uwot path for UMAP projection - point it to the directory with Projection Tools
BM_ref$save_uwot_path <- paste0(BM_projection_path, BM_ref$save_uwot_path)
ReferenceSeuratObj_BM <- CreateReferenceSeuratObj(BM_ref)
Warning: No assay specified, setting assay as RNA by default.Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from umap to umap_Warning: All keys should be one or more alphanumeric characters followed by an underscore '_', setting key to umap_
DimPlot(ReferenceSeuratObj_BM, reduction = 'umap', group.by = 'CellType_Annotation_formatted', raster=FALSE, label=TRUE, label.size = 4)
Cross-Ontogeny B-Development Map
Bdev_projection_path = 'BDevelopment_ProjectionTools/'
Bdev_ref <- readRDS(paste0(Bdev_projection_path, 'BDevelopment_RefMap_SymphonyRef.rds'))
# Fix uwot path for UMAP projection - point it to the directory with Projection Tools
Bdev_ref$save_uwot_path <- paste0(Bdev_projection_path, Bdev_ref$save_uwot_path)
library(ggpointdensity)
library(viridis)
library(jcolors)
map_sample_BM <- function(seurat_obj, batch_key){
# Map cells
seurat_obj <- mapQuery(
seurat_obj@assays$RNA@counts,
seurat_obj@meta.data,
BM_ref,
vars = batch_key,
return_type = 'Seurat'
) %>% calcMappingError(., reference = BM_ref, MAD.threshold = 2)
# Predict Labels
seurat_obj <- knnPredict.Seurat(
seurat_obj,
BM_ref,
label_transfer = 'CellType_Annotation',
k = 30
)
return(seurat_obj)
}
library(ggpointdensity)
library(jcolors)
plot_projection <- function(dat, ptx_id, refUMAP = refUMAP, downsample_reference = TRUE, save_folder = 'BALL_projections/Complete_BoneMarrowReference/Figures/'){
if(downsample_reference){
refUMAP <- refUMAP %>% sample_frac(0.25)
}
dat <- dat@meta.data %>% rownames_to_column('Cell') %>%
left_join(dat@reductions$umap@cell.embeddings %>% data.frame() %>% rownames_to_column('Cell') ) %>%
filter(mapping_error_QC == 'Pass') %>%
mutate(ref_query = 'query') %>%
bind_rows(refUMAP)
## Get the Background
background <- dat %>% filter(ref_query == 'reference') %>% select(-Sample)
heatpalette <- heat.colors(12)
p <- dat %>%
filter(ref_query == 'query') %>%
ggplot(aes(x = umap_1, y = umap_2)) +
geom_point(data = background, color='#E3E3E3', size=0.01, alpha=0.5) +
geom_pointdensity(size=0.05) +
scale_color_jcolors_contin("pal3", reverse = TRUE, bias = 1.75) +
geom_density_2d(alpha=0.4, color='black', h = 1.5, size=0.4) +
theme_void() + ggtitle(ptx_id) +
theme(strip.text.x = element_text(size=18), legend.position='none')
ggsave(paste0(save_folder, ptx_id, '_BoneMarrowReference_projectedUMAP.pdf'), height = 4, width = 6)
}
get_composition_BM <- function(BALL, composition_BM){
# get composition per cell
sample_composition <- BALL@meta.data %>% rownames_to_column('barcode') %>%
select(barcode, Sample, mapping_error_score, mapping_error_QC,
CellType_Annotation, CellType_Annotation_prob)
# concatenate lol
composition_BM <- bind_rows(composition_BM, sample_composition)
return(composition_BM)
}
basepath = 'BALL_rawdata/Expanded/'
matrixpath = '/filtered_feature_bc_matrix/'
BALL_patients <- list.files(basepath)
BALL_patients
[1] "SJALL040053_D1" "SJALL040066_D1" "SJALL040069_D1" "SJALL040070_D1" "SJALL040099_D1" "SJALL040100_D1" "SJALL040103_D1" "SJALL040119_D1"
[9] "SJALL040137" "SJALL040137_D1" "SJBALL004097_D2" "SJBALL006_D" "SJBALL006_R" "SJBALL014876_D1" "SJBALL021901_D1" "SJBALL021947_D1"
[17] "SJBALL021964_D1" "SJBALL021968_D1" "SJBALL021973_D1" "SJBALL022020_D1" "SJBALL022035_D1" "SJBALL022052_D1" "SJBALL030036_D1" "SJBALL030040_D1"
[25] "SJBALL030059_D1" "SJBALL030072_D1" "SJBALL030090_D1" "SJBALL030095_D1" "SJBALL030123_D1" "SJBALL030123_R1" "SJBALL030127_D1" "SJBALL030145_D1"
[33] "SJBALL030216_D1" "SJBALL030247_D1" "SJBALL030254_D1" "SJBALL030276_D1" "SJBALL030285_D1" "SJBALL030313_D1" "SJBALL030344_D1" "SJBALL030370_D1"
[41] "SJBALL030379_D1" "SJBALL030414_D1" "SJBALL030434_D1" "SJBALL030491_D1" "SJBALL030662_D1" "SJBALL030718_D1" "SJBALL030734_D1" "SJBALL030762_D1"
[49] "SJBALL030821_D1" "SJBALL030871_D1" "SJBALL030923_D1" "SJBALL030971_D1" "SJBALL030975_D1" "SJBALL031052_D1" "SJBALL031087_D1" "SJBALL031128_D1"
[57] "SJBALL031144_D1" "SJBALL031168_D1" "SJBALL031267_D1" "SJBALL031281_D1" "SJBALL081_D" "SJBALL087_D" "SJBALL104_D" "SJBALL113_D"
[65] "SJBALL182_D" "SJBALL182_R" "SJBALL211_D" "SJBALL243_D" "SJBALL255_D" "SJE2A063_D" "SJE2A066_D" "SJE2A067_D"
[73] "SJHYPER022017_D1" "SJHYPO021982_D1" "SJHYPO117_D" "SJHYPO120_D" "SJHYPO124_D" "SJHYPO143_D" "SJHYPO146_D" "SJINF022043_D"
[81] "SJMLL006_D" "SJMLL009_D" "SJPHALL004_D" "SJPHALL006_D" "SJPHALL007_D" "SJPHALL010_D" "SJPHALL020_D" "SJPHALL020_R"
[89] "SJPHALL021_D"
refUMAP <- data.frame(BM_ref$umap$embedding) %>% rename(umap_1 = X1, umap_2 = X2) %>% mutate(ref_query = 'reference')
BALL_composition_BMref <- data.frame()
# Test the first sample
for(pt_samp in BALL_patients){
print(paste0('Mapping: ', pt_samp))
start_time <- Sys.time()
# load in as seurat obj
BALL <- Seurat::Read10X(paste0(basepath, pt_samp, matrixpath)) %>% CreateSeuratObject()
BALL$Sample <- pt_samp
# QC >500 genes, >2500 counts, <8% mt counts
BALL <- Seurat::PercentageFeatureSet(BALL, pattern = '^MT-', col.name = 'pct.mito')
BALL <- subset(BALL, nFeature_RNA > 500 & nCount_RNA > 2500 & pct.mito < 8)
print(paste0('Post QC Cells: n = ', dim(BALL)[2]))
# Map sample onto reference map and label with 30 KNN, no batch correction
BALL <- map_sample_BM(BALL, NULL)
# plot projected cells onto UMAP
plot_projection(BALL, pt_samp, refUMAP = refUMAP, downsample_reference = TRUE, save_folder = 'BALL_projections/Complete_BoneMarrowReference/Figures/')
# get celltype composition
BALL_composition_BMref <- get_composition_BM(BALL, BALL_composition_BMref)
# save
BALL %>% saveRDS(paste0("BALL_projections/Complete_BoneMarrowReference/", pt_samp, "_projected.rds"))
rm(BALL)
end_time <- Sys.time()
print(end_time - start_time)
gc()
}
[1] "Mapping: SJALL040053_D1"
[1] "Post QC Cells: n = 4088"
Normalizing
Scaling and synchronizing query gene expression
Found 2360 reference variable genes in query dataset
Project query cells using reference gene loadings
Clustering query cells to reference centroids
Correcting query batch effects
UMAP
All done!
Warning: Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.SymphonyQuery.harmony; see ?make.names for more details on syntax validityJoining, by = "Cell"
Time difference of 1.814683 mins
[1] "Mapping: SJALL040066_D1"
[1] "Post QC Cells: n = 6880"
Time difference of 3.015293 mins
[1] "Mapping: SJALL040069_D1"
[1] "Post QC Cells: n = 8056"
Time difference of 3.585634 mins
[1] "Mapping: SJALL040070_D1"
[1] "Post QC Cells: n = 4486"
Time difference of 1.983575 mins
[1] "Mapping: SJALL040099_D1"
[1] "Post QC Cells: n = 5403"
Time difference of 2.30194 mins
[1] "Mapping: SJALL040100_D1"
[1] "Post QC Cells: n = 6897"
Time difference of 3.018985 mins
[1] "Mapping: SJALL040103_D1"
[1] "Post QC Cells: n = 4853"
Time difference of 2.030512 mins
[1] "Mapping: SJALL040119_D1"
[1] "Post QC Cells: n = 9313"
Time difference of 4.623792 mins
[1] "Mapping: SJALL040137"
[1] "Post QC Cells: n = 6202"
Time difference of 2.809618 mins
[1] "Mapping: SJALL040137_D1"
[1] "Post QC Cells: n = 6737"
Time difference of 2.995615 mins
[1] "Mapping: SJBALL004097_D2"
[1] "Post QC Cells: n = 2978"
Time difference of 1.398087 mins
[1] "Mapping: SJBALL006_D"
[1] "Post QC Cells: n = 7649"
Time difference of 3.345602 mins
[1] "Mapping: SJBALL006_R"
[1] "Post QC Cells: n = 4738"
Time difference of 2.025418 mins
[1] "Mapping: SJBALL014876_D1"
[1] "Post QC Cells: n = 1729"
Time difference of 52.1443 secs
[1] "Mapping: SJBALL021901_D1"
[1] "Post QC Cells: n = 4941"
Time difference of 2.19486 mins
[1] "Mapping: SJBALL021947_D1"
[1] "Post QC Cells: n = 5474"
Time difference of 2.438453 mins
[1] "Mapping: SJBALL021964_D1"
[1] "Post QC Cells: n = 5948"
Time difference of 2.73578 mins
[1] "Mapping: SJBALL021968_D1"
[1] "Post QC Cells: n = 6391"
Time difference of 3.032955 mins
[1] "Mapping: SJBALL021973_D1"
[1] "Post QC Cells: n = 6915"
Time difference of 2.859825 mins
[1] "Mapping: SJBALL022020_D1"
[1] "Post QC Cells: n = 3168"
Time difference of 1.37409 mins
[1] "Mapping: SJBALL022035_D1"
[1] "Post QC Cells: n = 2575"
Time difference of 1.058198 mins
[1] "Mapping: SJBALL022052_D1"
[1] "Post QC Cells: n = 6603"
Time difference of 2.758308 mins
[1] "Mapping: SJBALL030036_D1"
[1] "Post QC Cells: n = 3522"
Time difference of 1.578058 mins
[1] "Mapping: SJBALL030040_D1"
[1] "Post QC Cells: n = 10583"
Time difference of 4.171142 mins
[1] "Mapping: SJBALL030059_D1"
[1] "Post QC Cells: n = 4161"
Time difference of 1.926097 mins
[1] "Mapping: SJBALL030072_D1"
[1] "Post QC Cells: n = 8199"
Time difference of 3.30103 mins
[1] "Mapping: SJBALL030090_D1"
[1] "Post QC Cells: n = 5667"
Time difference of 2.344833 mins
[1] "Mapping: SJBALL030095_D1"
[1] "Post QC Cells: n = 6436"
Time difference of 2.756326 mins
[1] "Mapping: SJBALL030123_D1"
[1] "Post QC Cells: n = 5296"
Time difference of 2.15834 mins
[1] "Mapping: SJBALL030123_R1"
[1] "Post QC Cells: n = 7778"
Time difference of 3.493651 mins
[1] "Mapping: SJBALL030127_D1"
[1] "Post QC Cells: n = 6308"
Time difference of 2.64283 mins
[1] "Mapping: SJBALL030145_D1"
[1] "Post QC Cells: n = 5614"
Time difference of 2.476466 mins
[1] "Mapping: SJBALL030216_D1"
[1] "Post QC Cells: n = 7066"
Time difference of 3.031766 mins
[1] "Mapping: SJBALL030247_D1"
[1] "Post QC Cells: n = 6647"
Time difference of 2.890504 mins
[1] "Mapping: SJBALL030254_D1"
[1] "Post QC Cells: n = 6451"
Time difference of 2.824079 mins
[1] "Mapping: SJBALL030276_D1"
[1] "Post QC Cells: n = 5268"
Time difference of 2.464801 mins
[1] "Mapping: SJBALL030285_D1"
[1] "Post QC Cells: n = 7991"
Time difference of 3.867738 mins
[1] "Mapping: SJBALL030313_D1"
[1] "Post QC Cells: n = 7466"
Time difference of 3.088398 mins
[1] "Mapping: SJBALL030344_D1"
[1] "Post QC Cells: n = 7453"
Time difference of 3.17111 mins
[1] "Mapping: SJBALL030370_D1"
[1] "Post QC Cells: n = 5952"
Time difference of 2.771318 mins
[1] "Mapping: SJBALL030379_D1"
[1] "Post QC Cells: n = 5189"
Time difference of 2.124042 mins
[1] "Mapping: SJBALL030414_D1"
[1] "Post QC Cells: n = 4100"
Time difference of 1.736319 mins
[1] "Mapping: SJBALL030434_D1"
[1] "Post QC Cells: n = 1165"
Time difference of 33.57605 secs
[1] "Mapping: SJBALL030491_D1"
[1] "Post QC Cells: n = 6936"
Time difference of 2.941001 mins
[1] "Mapping: SJBALL030662_D1"
[1] "Post QC Cells: n = 6615"
Time difference of 2.813917 mins
[1] "Mapping: SJBALL030718_D1"
[1] "Post QC Cells: n = 7674"
Time difference of 3.267861 mins
[1] "Mapping: SJBALL030734_D1"
[1] "Post QC Cells: n = 8075"
Time difference of 3.471585 mins
[1] "Mapping: SJBALL030762_D1"
[1] "Post QC Cells: n = 4836"
Time difference of 2.008734 mins
[1] "Mapping: SJBALL030821_D1"
[1] "Post QC Cells: n = 2478"
Time difference of 1.044536 mins
[1] "Mapping: SJBALL030871_D1"
[1] "Post QC Cells: n = 5767"
Time difference of 2.340077 mins
[1] "Mapping: SJBALL030923_D1"
[1] "Post QC Cells: n = 6031"
Time difference of 2.555967 mins
[1] "Mapping: SJBALL030971_D1"
[1] "Post QC Cells: n = 6106"
Time difference of 2.603852 mins
[1] "Mapping: SJBALL030975_D1"
[1] "Post QC Cells: n = 7253"
Time difference of 3.045372 mins
[1] "Mapping: SJBALL031052_D1"
[1] "Post QC Cells: n = 5253"
Time difference of 2.107476 mins
[1] "Mapping: SJBALL031087_D1"
[1] "Post QC Cells: n = 4536"
Time difference of 1.793173 mins
[1] "Mapping: SJBALL031128_D1"
[1] "Post QC Cells: n = 8008"
Time difference of 3.273764 mins
[1] "Mapping: SJBALL031144_D1"
[1] "Post QC Cells: n = 7299"
Time difference of 2.859784 mins
[1] "Mapping: SJBALL031168_D1"
[1] "Post QC Cells: n = 6672"
Time difference of 2.69549 mins
[1] "Mapping: SJBALL031267_D1"
[1] "Post QC Cells: n = 5382"
Time difference of 2.050455 mins
[1] "Mapping: SJBALL031281_D1"
[1] "Post QC Cells: n = 5787"
Time difference of 2.220514 mins
[1] "Mapping: SJBALL081_D"
[1] "Post QC Cells: n = 8414"
Time difference of 3.501642 mins
[1] "Mapping: SJBALL087_D"
[1] "Post QC Cells: n = 7239"
Time difference of 2.984561 mins
[1] "Mapping: SJBALL104_D"
[1] "Post QC Cells: n = 6935"
Time difference of 2.87639 mins
[1] "Mapping: SJBALL113_D"
[1] "Post QC Cells: n = 5761"
Time difference of 2.378711 mins
[1] "Mapping: SJBALL182_D"
[1] "Post QC Cells: n = 5120"
Time difference of 3.214769 mins
[1] "Mapping: SJBALL182_R"
[1] "Post QC Cells: n = 7803"
Time difference of 3.481382 mins
[1] "Mapping: SJBALL211_D"
[1] "Post QC Cells: n = 4775"
Time difference of 2.162919 mins
[1] "Mapping: SJBALL243_D"
[1] "Post QC Cells: n = 5868"
Time difference of 2.538521 mins
[1] "Mapping: SJBALL255_D"
[1] "Post QC Cells: n = 6194"
Time difference of 2.721762 mins
[1] "Mapping: SJE2A063_D"
[1] "Post QC Cells: n = 6750"
Time difference of 2.643834 mins
[1] "Mapping: SJE2A066_D"
[1] "Post QC Cells: n = 7933"
Time difference of 3.38001 mins
[1] "Mapping: SJE2A067_D"
[1] "Post QC Cells: n = 3296"
Time difference of 1.357974 mins
[1] "Mapping: SJHYPER022017_D1"
[1] "Post QC Cells: n = 9613"
Time difference of 3.90506 mins
[1] "Mapping: SJHYPO021982_D1"
[1] "Post QC Cells: n = 9330"
Time difference of 3.718171 mins
[1] "Mapping: SJHYPO117_D"
[1] "Post QC Cells: n = 6565"
Time difference of 2.583308 mins
[1] "Mapping: SJHYPO120_D"
[1] "Post QC Cells: n = 6420"
Time difference of 2.829995 mins
[1] "Mapping: SJHYPO124_D"
[1] "Post QC Cells: n = 3810"
Time difference of 1.702335 mins
[1] "Mapping: SJHYPO143_D"
[1] "Post QC Cells: n = 2121"
Time difference of 1.297093 mins
[1] "Mapping: SJHYPO146_D"
[1] "Post QC Cells: n = 4816"
Time difference of 1.95948 mins
[1] "Mapping: SJINF022043_D"
[1] "Post QC Cells: n = 5772"
Time difference of 2.324233 mins
[1] "Mapping: SJMLL006_D"
[1] "Post QC Cells: n = 4585"
Time difference of 1.96375 mins
[1] "Mapping: SJMLL009_D"
[1] "Post QC Cells: n = 7172"
Time difference of 3.184118 mins
[1] "Mapping: SJPHALL004_D"
[1] "Post QC Cells: n = 6069"
Time difference of 2.494174 mins
[1] "Mapping: SJPHALL006_D"
[1] "Post QC Cells: n = 5805"
Time difference of 2.316203 mins
[1] "Mapping: SJPHALL007_D"
[1] "Post QC Cells: n = 7460"
Time difference of 3.054827 mins
[1] "Mapping: SJPHALL010_D"
[1] "Post QC Cells: n = 3136"
Time difference of 1.44603 mins
[1] "Mapping: SJPHALL020_D"
[1] "Post QC Cells: n = 8538"
Time difference of 3.60041 mins
[1] "Mapping: SJPHALL020_R"
[1] "Post QC Cells: n = 7582"
Time difference of 3.332086 mins
[1] "Mapping: SJPHALL021_D"
[1] "Post QC Cells: n = 6323"
Time difference of 2.660585 mins
BALL_composition_BMref %>% write_csv('BALL_projections/Complete_BoneMarrowReference/BALL_celltype_annotations_FullReference.csv')
map_sample_BDev <- function(seurat_obj, batch_key){
# Map cells
seurat_obj <- mapQuery(
seurat_obj@assays$SymphonyQuery@counts,
seurat_obj@meta.data,
Bdev_ref,
vars = batch_key,
return_type = 'Seurat'
)
# Predict Labels
seurat_obj <- knnPredict.Seurat(
seurat_obj,
Bdev_ref,
label_transfer = 'BDevelopment_CellType_Comprehensive',
k = 30
)
return(seurat_obj)
}
library(ggpointdensity)
library(jcolors)
plot_BDev_projection <- function(dat, ptx_id, refUMAP, downsample_reference = TRUE, save_folder = 'BALL_projections/Focused_BDevelopment/Figures/'){
if(downsample_reference){
set.seed(123)
refUMAP <- refUMAP %>% sample_frac(0.5)
}
dat <- dat@meta.data %>% rownames_to_column('Cell') %>%
left_join(dat@reductions$umap@cell.embeddings %>% data.frame() %>% rownames_to_column('Cell') ) %>%
filter(mapping_error_QC == 'Pass') %>%
mutate(ref_query = 'query') %>%
bind_rows(refUMAP)
## Get the Background
background <- dat %>% filter(ref_query == 'reference') %>% select(-Sample)
heatpalette <- heat.colors(12)
p <- dat %>%
filter(ref_query == 'query') %>%
ggplot(aes(x = -umap_1, y = umap_2)) +
geom_point(data = background, color='#E3E3E3', size=0.005, alpha=0.5) +
geom_pointdensity(size=0.02) +
scale_color_jcolors_contin("pal3", reverse = TRUE, bias = 1.75) +
geom_density_2d(alpha=0.4, color='black', h = 1.5, size=0.3) +
theme_void() + ggtitle(ptx_id) +
theme(strip.text.x = element_text(size=18), legend.position='none')
ggsave(paste0(save_folder, ptx_id, '_BdevelopmentReference_projectedUMAP.pdf'), height = 4, width = 6)
}
get_UMAPcoordinates <- function(BALL, umap_coordinates){
# get composition per cell
sample_coordinates <- BALL@meta.data %>% rownames_to_column('barcode') %>%
select(barcode, Sample) %>%
left_join(BALL@reductions$umap@cell.embeddings %>% data.frame() %>% rownames_to_column('barcode') )
# concatenate
umap_coordinates <- bind_rows(umap_coordinates, sample_coordinates)
return(umap_coordinates)
}
get_composition_Bdev <- function(BALL, composition_Bdev){
# get composition per cell
sample_composition <- BALL@meta.data %>% rownames_to_column('barcode') %>%
select(barcode, Sample, BDevelopment_CellType_Comprehensive, BDevelopment_CellType_Comprehensive_prob)
# concatenate
composition_Bdev <- bind_rows(composition_Bdev, sample_composition)
return(composition_Bdev)
}
For each projected sample we will subset the following celltypes: - HSC, MPP-LMPP, LMPP, Early GMP, MLP, MLP-II - Pre-pDC, Pre-pDC Cycling, pDC - CLP, Pre-ProB, Pro-B VDJ, Pro-B Cycling, Large Pre-B, Small Pre-B, Immature B, Mature B
BDev_celltypes <- c('HSC', 'HSC/MPP', 'MPP-MyLy', 'MPP-LMPP', 'LMPP', 'Early GMP', 'MLP', 'MLP-II', 'Pre-pDC', 'Pre-pDC Cycling', 'pDC',
'CLP', 'EarlyProB', 'Pre-ProB', 'Pro-B VDJ', 'Pro-B Cycling', 'Large Pre-B', 'Small Pre-B', 'Immature B', 'Mature B')
projectedpath = 'BALL_projections/Complete_BoneMarrowReference/'
#matrixpath = '/filtered_feature_bc_matrix/'
BALL_patients_projected <- list.files(projectedpath, pattern = 'rds')
BALL_patients_projected
[1] "SJALL040053_D1_projected.rds" "SJALL040066_D1_projected.rds" "SJALL040069_D1_projected.rds" "SJALL040070_D1_projected.rds"
[5] "SJALL040099_D1_projected.rds" "SJALL040100_D1_projected.rds" "SJALL040103_D1_projected.rds" "SJALL040119_D1_projected.rds"
[9] "SJALL040137_D1_projected.rds" "SJALL040137_projected.rds" "SJBALL004097_D2_projected.rds" "SJBALL006_D_projected.rds"
[13] "SJBALL006_R_projected.rds" "SJBALL014876_D1_projected.rds" "SJBALL021901_D1_projected.rds" "SJBALL021947_D1_projected.rds"
[17] "SJBALL021964_D1_projected.rds" "SJBALL021968_D1_projected.rds" "SJBALL021973_D1_projected.rds" "SJBALL022020_D1_projected.rds"
[21] "SJBALL022035_D1_projected.rds" "SJBALL022052_D1_projected.rds" "SJBALL030036_D1_projected.rds" "SJBALL030040_D1_projected.rds"
[25] "SJBALL030059_D1_projected.rds" "SJBALL030072_D1_projected.rds" "SJBALL030090_D1_projected.rds" "SJBALL030095_D1_projected.rds"
[29] "SJBALL030123_D1_projected.rds" "SJBALL030123_R1_projected.rds" "SJBALL030127_D1_projected.rds" "SJBALL030145_D1_projected.rds"
[33] "SJBALL030216_D1_projected.rds" "SJBALL030247_D1_projected.rds" "SJBALL030254_D1_projected.rds" "SJBALL030276_D1_projected.rds"
[37] "SJBALL030285_D1_projected.rds" "SJBALL030313_D1_projected.rds" "SJBALL030344_D1_projected.rds" "SJBALL030370_D1_projected.rds"
[41] "SJBALL030379_D1_projected.rds" "SJBALL030414_D1_projected.rds" "SJBALL030434_D1_projected.rds" "SJBALL030491_D1_projected.rds"
[45] "SJBALL030662_D1_projected.rds" "SJBALL030718_D1_projected.rds" "SJBALL030734_D1_projected.rds" "SJBALL030762_D1_projected.rds"
[49] "SJBALL030821_D1_projected.rds" "SJBALL030871_D1_projected.rds" "SJBALL030923_D1_projected.rds" "SJBALL030971_D1_projected.rds"
[53] "SJBALL030975_D1_projected.rds" "SJBALL031052_D1_projected.rds" "SJBALL031087_D1_projected.rds" "SJBALL031128_D1_projected.rds"
[57] "SJBALL031144_D1_projected.rds" "SJBALL031168_D1_projected.rds" "SJBALL031267_D1_projected.rds" "SJBALL031281_D1_projected.rds"
[61] "SJBALL081_D_projected.rds" "SJBALL087_D_projected.rds" "SJBALL104_D_projected.rds" "SJBALL113_D_projected.rds"
[65] "SJBALL182_D_projected.rds" "SJBALL182_R_projected.rds" "SJBALL211_D_projected.rds" "SJBALL243_D_projected.rds"
[69] "SJBALL255_D_projected.rds" "SJE2A063_D_projected.rds" "SJE2A066_D_projected.rds" "SJE2A067_D_projected.rds"
[73] "SJHYPER022017_D1_projected.rds" "SJHYPO021982_D1_projected.rds" "SJHYPO117_D_projected.rds" "SJHYPO120_D_projected.rds"
[77] "SJHYPO124_D_projected.rds" "SJHYPO143_D_projected.rds" "SJHYPO146_D_projected.rds" "SJINF022043_D_projected.rds"
[81] "SJMLL006_D_projected.rds" "SJMLL009_D_projected.rds" "SJPHALL004_D_projected.rds" "SJPHALL006_D_projected.rds"
[85] "SJPHALL007_D_projected.rds" "SJPHALL010_D_projected.rds" "SJPHALL020_D_projected.rds" "SJPHALL020_R_projected.rds"
[89] "SJPHALL021_D_projected.rds"
BALL_patients_projected[1]
[1] "SJALL040053_D1_projected.rds"
BALL <- readRDS(paste0(projectedpath, BALL_patients_projected[1]))
BALL
An object of class Seurat
36601 features across 4088 samples within 1 assay
Active assay: SymphonyQuery (36601 features, 0 variable features)
3 dimensional reductions calculated: pca, harmony, umap
BALL_UMAPcoordinates_BMref <- data.frame()
BALL_UMAPcoordinates_BMref <- get_UMAPcoordinates(BALL, BALL_UMAPcoordinates_BMref)
Joining, by = "barcode"
BALL_UMAPcoordinates_BMref
refUMAP_bdev <- data.frame(Bdev_ref$umap$embedding) %>% rename(umap_1 = X1, umap_2 = X2) %>% mutate(ref_query = 'reference')
BALL_composition_BDev <- data.frame()
BALL_UMAPcoordinates_BMref <- data.frame()
BALL_UMAPcoordinates_BDev <- data.frame()
# Test the first sample
for(pt_samp in BALL_patients_projected){
print(paste0('Mapping: ', pt_samp))
start_time <- Sys.time()
# load in as seurat obj
BALL <- readRDS(paste0(projectedpath, pt_samp))
# get UMAP coordinates from BM Reference mapping
BALL_UMAPcoordinates_BMref <- get_UMAPcoordinates(BALL, BALL_UMAPcoordinates_BMref)
# subset to include B development lineage
BALL <- subset(BALL, CellType_Annotation %in% BDev_celltypes)
# Map sample onto reference map and label with 30 KNN, no batch correction
pt_samp <- pt_samp %>% str_replace('_projected.rds','')
BALL <- map_sample_BDev(BALL, NULL)
# plot projected cells onto UMAP
plot_BDev_projection(BALL, pt_samp, refUMAP = refUMAP_bdev, downsample_reference = TRUE, save_folder = 'BALL_projections/Focused_BDevelopment/Figures/')
# get celltype composition
BALL_composition_BDev <- get_composition_Bdev(BALL, BALL_composition_BDev)
# get UMAP coordinates from BDevelopment mapping
BALL_UMAPcoordinates_BDev <- get_UMAPcoordinates(BALL, BALL_UMAPcoordinates_BDev)
# save
BALL %>% saveRDS(paste0("BALL_projections/Focused_BDevelopment/", pt_samp, "_projected_BDevFocus.rds"))
rm(BALL)
end_time <- Sys.time()
print(end_time - start_time)
gc()
}
[1] "Mapping: SJALL040053_D1_projected.rds"
Joining, by = "barcode"Normalizing
Scaling and synchronizing query gene expression
Found 950 reference variable genes in query dataset
Project query cells using reference gene loadings
Clustering query cells to reference centroids
Correcting query batch effects
UMAP
All done!
Warning: Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.SymphonyQuery.harmony; see ?make.names for more details on syntax validityJoining, by = "Cell"
Time difference of 50.42848 secs
[1] "Mapping: SJALL040066_D1_projected.rds"
Time difference of 1.430251 mins
[1] "Mapping: SJALL040069_D1_projected.rds"
Time difference of 1.463659 mins
[1] "Mapping: SJALL040070_D1_projected.rds"
Time difference of 48.96805 secs
[1] "Mapping: SJALL040099_D1_projected.rds"
Time difference of 1.069611 mins
[1] "Mapping: SJALL040100_D1_projected.rds"
Time difference of 1.398232 mins
[1] "Mapping: SJALL040103_D1_projected.rds"
Time difference of 56.95845 secs
[1] "Mapping: SJALL040119_D1_projected.rds"
Time difference of 2.135658 mins
[1] "Mapping: SJALL040137_D1_projected.rds"
Time difference of 1.293413 mins
[1] "Mapping: SJALL040137_projected.rds"
Time difference of 1.134639 mins
[1] "Mapping: SJBALL004097_D2_projected.rds"
Time difference of 33.78636 secs
[1] "Mapping: SJBALL006_D_projected.rds"
Time difference of 1.677079 mins
[1] "Mapping: SJBALL006_R_projected.rds"
Time difference of 59.21684 secs
[1] "Mapping: SJBALL014876_D1_projected.rds"
Time difference of 23.18006 secs
[1] "Mapping: SJBALL021901_D1_projected.rds"
Time difference of 1.042573 mins
[1] "Mapping: SJBALL021947_D1_projected.rds"
Time difference of 50.92988 secs
[1] "Mapping: SJBALL021964_D1_projected.rds"
Time difference of 1.228102 mins
[1] "Mapping: SJBALL021968_D1_projected.rds"
Time difference of 1.3662 mins
[1] "Mapping: SJBALL021973_D1_projected.rds"
Time difference of 1.486559 mins
[1] "Mapping: SJBALL022020_D1_projected.rds"
Time difference of 40.7226 secs
[1] "Mapping: SJBALL022035_D1_projected.rds"
Time difference of 32.62492 secs
[1] "Mapping: SJBALL022052_D1_projected.rds"
Time difference of 1.361965 mins
[1] "Mapping: SJBALL030036_D1_projected.rds"
Time difference of 36.27694 secs
[1] "Mapping: SJBALL030040_D1_projected.rds"
Time difference of 2.002704 mins
[1] "Mapping: SJBALL030059_D1_projected.rds"
Time difference of 46.49931 secs
[1] "Mapping: SJBALL030072_D1_projected.rds"
Time difference of 1.35629 mins
[1] "Mapping: SJBALL030090_D1_projected.rds"
Time difference of 1.083832 mins
[1] "Mapping: SJBALL030095_D1_projected.rds"
Time difference of 1.236406 mins
[1] "Mapping: SJBALL030123_D1_projected.rds"
Time difference of 54.29487 secs
[1] "Mapping: SJBALL030123_R1_projected.rds"
Time difference of 1.401866 mins
[1] "Mapping: SJBALL030127_D1_projected.rds"
Time difference of 1.179053 mins
[1] "Mapping: SJBALL030145_D1_projected.rds"
Time difference of 1.150743 mins
[1] "Mapping: SJBALL030216_D1_projected.rds"
Time difference of 1.328065 mins
[1] "Mapping: SJBALL030247_D1_projected.rds"
Time difference of 1.455484 mins
[1] "Mapping: SJBALL030254_D1_projected.rds"
Time difference of 1.091389 mins
[1] "Mapping: SJBALL030276_D1_projected.rds"
Time difference of 1.152758 mins
[1] "Mapping: SJBALL030285_D1_projected.rds"
Time difference of 1.828319 mins
[1] "Mapping: SJBALL030313_D1_projected.rds"
Time difference of 1.375607 mins
[1] "Mapping: SJBALL030344_D1_projected.rds"
Time difference of 1.470846 mins
[1] "Mapping: SJBALL030370_D1_projected.rds"
Time difference of 1.280466 mins
[1] "Mapping: SJBALL030379_D1_projected.rds"
Time difference of 1.024783 mins
[1] "Mapping: SJBALL030414_D1_projected.rds"
Time difference of 39.38767 secs
[1] "Mapping: SJBALL030434_D1_projected.rds"
Time difference of 9.560939 secs
[1] "Mapping: SJBALL030491_D1_projected.rds"
Time difference of 1.428839 mins
[1] "Mapping: SJBALL030662_D1_projected.rds"
Time difference of 1.326386 mins
[1] "Mapping: SJBALL030718_D1_projected.rds"
Time difference of 1.444446 mins
[1] "Mapping: SJBALL030734_D1_projected.rds"
Time difference of 1.640176 mins
[1] "Mapping: SJBALL030762_D1_projected.rds"
Time difference of 58.13085 secs
[1] "Mapping: SJBALL030821_D1_projected.rds"
Time difference of 33.09449 secs
[1] "Mapping: SJBALL030871_D1_projected.rds"
Time difference of 49.77356 secs
[1] "Mapping: SJBALL030923_D1_projected.rds"
Time difference of 1.174341 mins
[1] "Mapping: SJBALL030971_D1_projected.rds"
Time difference of 1.075251 mins
[1] "Mapping: SJBALL030975_D1_projected.rds"
Time difference of 1.405576 mins
[1] "Mapping: SJBALL031052_D1_projected.rds"
Time difference of 1.013604 mins
[1] "Mapping: SJBALL031087_D1_projected.rds"
Time difference of 55.91589 secs
[1] "Mapping: SJBALL031128_D1_projected.rds"
Time difference of 1.540495 mins
[1] "Mapping: SJBALL031144_D1_projected.rds"
Time difference of 1.110498 mins
[1] "Mapping: SJBALL031168_D1_projected.rds"
Time difference of 1.442441 mins
[1] "Mapping: SJBALL031267_D1_projected.rds"
Time difference of 1.035721 mins
[1] "Mapping: SJBALL031281_D1_projected.rds"
Time difference of 1.137457 mins
[1] "Mapping: SJBALL081_D_projected.rds"
Time difference of 1.930735 mins
[1] "Mapping: SJBALL087_D_projected.rds"
Time difference of 1.210253 mins
[1] "Mapping: SJBALL104_D_projected.rds"
Time difference of 1.440626 mins
[1] "Mapping: SJBALL113_D_projected.rds"
Time difference of 1.132059 mins
[1] "Mapping: SJBALL182_D_projected.rds"
Time difference of 1.032651 mins
[1] "Mapping: SJBALL182_R_projected.rds"
Time difference of 49.34748 secs
[1] "Mapping: SJBALL211_D_projected.rds"
Time difference of 1.015188 mins
[1] "Mapping: SJBALL243_D_projected.rds"
Time difference of 1.21827 mins
[1] "Mapping: SJBALL255_D_projected.rds"
Time difference of 1.164521 mins
[1] "Mapping: SJE2A063_D_projected.rds"
Time difference of 1.388237 mins
[1] "Mapping: SJE2A066_D_projected.rds"
Time difference of 1.734002 mins
[1] "Mapping: SJE2A067_D_projected.rds"
Time difference of 37.33951 secs
[1] "Mapping: SJHYPER022017_D1_projected.rds"
Time difference of 2.039134 mins
[1] "Mapping: SJHYPO021982_D1_projected.rds"
Time difference of 1.872619 mins
[1] "Mapping: SJHYPO117_D_projected.rds"
Time difference of 1.211332 mins
[1] "Mapping: SJHYPO120_D_projected.rds"
Time difference of 1.450417 mins
[1] "Mapping: SJHYPO124_D_projected.rds"
Time difference of 39.14564 secs
[1] "Mapping: SJHYPO143_D_projected.rds"
Time difference of 25.34178 secs
[1] "Mapping: SJHYPO146_D_projected.rds"
Time difference of 41.4173 secs
[1] "Mapping: SJINF022043_D_projected.rds"
Time difference of 1.129802 mins
[1] "Mapping: SJMLL006_D_projected.rds"
Time difference of 1.017289 mins
[1] "Mapping: SJMLL009_D_projected.rds"
Time difference of 1.613155 mins
[1] "Mapping: SJPHALL004_D_projected.rds"
Time difference of 1.20073 mins
[1] "Mapping: SJPHALL006_D_projected.rds"
Time difference of 1.010509 mins
[1] "Mapping: SJPHALL007_D_projected.rds"
Time difference of 1.488113 mins
[1] "Mapping: SJPHALL010_D_projected.rds"
Time difference of 34.97153 secs
[1] "Mapping: SJPHALL020_D_projected.rds"
Time difference of 1.885798 mins
[1] "Mapping: SJPHALL020_R_projected.rds"
Time difference of 1.646972 mins
[1] "Mapping: SJPHALL021_D_projected.rds"
Time difference of 1.869184 mins
# save cell annotations
BALL_composition_BDev %>% write_csv('BALL_projections/Focused_BDevelopment/BALL_celltype_annotations_Bdevelopment.csv')
# save UMAP coordinates for FullReference and BDevelopment
BALL_UMAPcoordinates_BMref %>% write_csv('BALL_projections/Complete_BoneMarrowReference/BALL_UMAPcoordinates_FullReference.csv')
BALL_UMAPcoordinates_BDev %>% write_csv('BALL_projections/Focused_BDevelopment/BALL_UMAPcoordinates_Bdevelopment.csv')
BALL_UMAPcoordinates_BMref %>% write_csv('BALL_projections/Complete_BoneMarrowReference/BALL_UMAPcoordinates_FullReference.csv')
BALL_UMAPcoordinates_BDev %>% write_csv('BALL_projections/Focused_BDevelopment/BALL_UMAPcoordinates_Bdevelopment.csv')
BALL_UMAPcoordinates_BMref %>% dplyr::rename(FullReference_UMAP1 = umap_1, FullReference_UMAP2 = umap_2)
BALL_UMAPcoordinates_BDev %>% mutate(umap_1 = -umap_1) %>% dplyr::rename(BDevelopment_UMAP1 = umap_1, BDevelopment_UMAP2 = umap_2)
BALL_composition %>% pull(Directory) %>% table() %>% sort()
.
SJBALL030434_D1 SJBALL014876_D1 SJHYPO143_D SJBALL030821_D1 SJBALL022035_D1 SJBALL004097_D2 SJPHALL010_D SJBALL022020_D1 SJE2A067_D
1165 1729 2121 2478 2575 2978 3136 3168 3296
SJBALL030036_D1 SJHYPO124_D SJALL040053_D1 SJBALL030414_D1 SJBALL030059_D1 SJALL040070_D1 SJBALL031087_D1 SJMLL006_D SJBALL006_R
3522 3810 4088 4100 4161 4486 4536 4585 4738
SJBALL211_D SJHYPO146_D SJBALL030762_D1 SJALL040103_D1 SJBALL021901_D1 SJBALL182_D SJBALL030379_D1 SJBALL031052_D1 SJBALL030276_D1
4775 4816 4836 4853 4941 5120 5189 5253 5268
SJBALL030123_D1 SJBALL031267_D1 SJALL040099_D1 SJBALL021947_D1 SJBALL030145_D1 SJBALL030090_D1 SJBALL113_D SJBALL030871_D1 SJINF022043_D
5296 5382 5403 5474 5614 5667 5761 5767 5772
SJBALL031281_D1 SJPHALL006_D SJBALL243_D SJBALL021964_D1 SJBALL030370_D1 SJBALL030923_D1 SJPHALL004_D SJBALL030971_D1 SJBALL255_D
5787 5805 5868 5948 5952 6031 6069 6106 6194
SJALL040137 SJBALL030127_D1 SJPHALL021_D SJBALL021968_D1 SJHYPO120_D SJBALL030095_D1 SJBALL030254_D1 SJHYPO117_D SJBALL022052_D1
6202 6308 6323 6391 6420 6436 6451 6565 6603
SJBALL030662_D1 SJBALL030247_D1 SJBALL031168_D1 SJALL040137_D1 SJE2A063_D SJALL040066_D1 SJALL040100_D1 SJBALL021973_D1 SJBALL104_D
6615 6647 6672 6737 6750 6880 6897 6915 6935
SJBALL030491_D1 SJBALL030216_D1 SJMLL009_D SJBALL087_D SJBALL030975_D1 SJBALL031144_D1 SJBALL030344_D1 SJPHALL007_D SJBALL030313_D1
6936 7066 7172 7239 7253 7299 7453 7460 7466
SJPHALL020_R SJBALL006_D SJBALL030718_D1 SJBALL030123_R1 SJBALL182_R SJE2A066_D SJBALL030285_D1 SJBALL031128_D1 SJALL040069_D1
7582 7649 7674 7778 7803 7933 7991 8008 8056
SJBALL030734_D1 SJBALL030072_D1 SJBALL081_D SJPHALL020_D SJALL040119_D1 SJHYPO021982_D1 SJHYPER022017_D1 SJBALL030040_D1
8075 8199 8414 8538 9313 9330 9613 10583
Raw counts from filtered BALL cells were projected onto the bone marrow reference map using Symphony (Kang et al). Filtering criteria was: - % mito < 8 - nCount_RNA > 2500 - nFeature_RNA > 500
Cells assigned to be within the B-cell development lineage: - HSC, MPP-LMPP, LMPP, Early GMP, MLP, MLP-II - CLP, Pre-ProB, Pro-B VDJ, Pro-B Cycling, Large Pre-B, Small Pre-B, Immature B, Mature B - Pre-pDC, Pre-pDC Cycling, pDC
Were subsetted and projected onto the cross-ontogeny map of B-cell development using Symphony to refine cell type classification along the B cell lineage.